import re

from prowler.lib.check.models import Check, Check_Report_AWS
from prowler.lib.logger import logger
from prowler.providers.aws.services.s3.s3_client import s3_client


class s3_bucket_shadow_resource_vulnerability(Check):
    def execute(self):
        findings = []
        # Predictable bucket name patterns from the research article
        # These patterns are used by AWS services and can be claimed by attackers
        predictable_patterns = {
            "Glue": f"aws-glue-assets-{s3_client.provider.identity.account}-<region>",
            "SageMaker": f"sagemaker-<region>-{s3_client.provider.identity.account}",
            # "CloudFormation": "cf-templates-.*-<region>",
            "EMR": f"aws-emr-studio-{s3_client.provider.identity.account}-<region>",
            "CodeStar": f"aws-codestar-<region>-{s3_client.provider.identity.account}",
            # Add other patterns here as they are discovered
        }

        # Track buckets we've already reported to avoid duplicates
        reported_buckets = set()

        # First, check buckets in the current account
        for bucket in s3_client.buckets.values():
            report = Check_Report_AWS(self.metadata(), resource=bucket)
            report.region = bucket.region
            report.resource_id = bucket.name
            report.resource_arn = bucket.arn
            report.resource_tags = bucket.tags
            report.status = "PASS"
            report.status_extended = (
                f"S3 bucket {bucket.name} is not a known shadow resource."
            )

            # Check if this bucket matches any predictable pattern
            for service, pattern_format in predictable_patterns.items():
                pattern = pattern_format.replace("<region>", bucket.region)

                if re.match(pattern, bucket.name):
                    if bucket.owner_id != s3_client.audited_canonical_id:
                        report.status = "FAIL"
                        report.status_extended = f"S3 bucket {bucket.name} for service {service} is a known shadow resource and it is owned by another account ({bucket.owner_id})."
                    else:
                        report.status = "PASS"
                        report.status_extended = f"S3 bucket {bucket.name} for service {service} is a known shadow resource but it is correctly owned by the audited account."
                    break
            findings.append(report)
            reported_buckets.add(bucket.name)

        # Now check for shadow resources in other accounts by testing predictable patterns
        # We'll test different regions to see if shadow resources exist
        regions_to_test = (
            s3_client.provider.identity.audited_regions
            or s3_client.regional_clients.keys()
        )

        for region in regions_to_test:
            for service, pattern_format in predictable_patterns.items():
                # Generate bucket name for this region
                bucket_name = pattern_format.replace("<region>", region)

                # Skip if we've already reported this bucket
                if bucket_name in reported_buckets:
                    continue

                logger.info(
                    f"Checking if shadow resource bucket {bucket_name} exists in other accounts"
                )
                # Check if this bucket exists in another account
                if s3_client._head_bucket(bucket_name):
                    report = Check_Report_AWS(self.metadata(), resource={})
                    report.region = region
                    report.resource_id = bucket_name
                    report.resource_arn = (
                        f"arn:{s3_client.audited_partition}:s3:::{bucket_name}"
                    )
                    report.resource_tags = []
                    report.status = "FAIL"
                    report.status_extended = f"S3 bucket {bucket_name} for service {service} is a known shadow resource that exists and is owned by another account."

                    findings.append(report)
                    reported_buckets.add(bucket_name)

        return findings
